After some researches about meteostat data nearest station in DE that belongs to Kelheim region are:
there are many of them so I am starting to think about extracting all from the Bayern or extract the nearest from longtitude/latitude point with the Kelheim shapefile(using json and Euclid distances)
Kelheim has no weather station, but it could be reconstructed with 2 other
Hohenfels with id: “10775” and Ingolstadt with id:“10860” kelheim_data = {weight1}x{hohenfels} + {weight2}x{inglstadt}
Also this site shows, that there are many of the Kelheim stations in this area, but meteostat doesn’t contain them https://www.wunderground.com/dashboard/pws/IKELHE5
weatherstack_kelheim = read_delim("data/Kelheim_weather_since_july_2008.csv",delim = ",")
## Rows: 120312 Columns: 6
## ── Column specification ─────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): description
## dbl (4): hour, precip, visibility, totalsnow_daily
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(weatherstack_kelheim)
## # A tibble: 120,312 × 6
## date hour description precip visibility totalsnow_daily
## <date> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 2008-07-01 0 Clear 0 10 0
## 2 2008-07-01 100 Clear 0 10 0
## 3 2008-07-01 200 Clear 0 10 0
## 4 2008-07-01 300 Clear 0 10 0
## 5 2008-07-01 400 Clear 0 10 0
## 6 2008-07-01 500 Clear 0 10 0
## 7 2008-07-01 600 Sunny 0 10 0
## 8 2008-07-01 700 Sunny 0 10 0
## 9 2008-07-01 800 Sunny 0 10 0
## 10 2008-07-01 900 Sunny 0 10 0
## # … with 120,302 more rows
## # ℹ Use `print(n = ...)` to see more rows
What to take as a reffer point isn’t clear because of the date(before/after covid) and weather type (sunny,clear,temperature) Also there is no temperature in it :/
global_mobility = read_delim("https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv",",")
de_mobility = global_mobility %>% filter(country_region_code == "DE")
print(unique(de_mobility$sub_region_1))
## [1] NA "Baden-Württemberg" "Bavaria"
## [4] "Berlin" "Brandenburg" "Bremen"
## [7] "Hamburg" "Hessen" "Lower Saxony"
## [10] "Mecklenburg-Vorpommern" "North Rhine-Westphalia" "Rhineland-Palatinate"
## [13] "Saarland" "Saxony" "Saxony-Anhalt"
## [16] "Schleswig-Holstein" "Thuringia"
As we can see the most precise region to filter data from is Bavaria :/
Relevant data for the , mobility
bavaria_mobility = de_mobility %>% filter(sub_region_1 == "Bavaria")
bavaria_mobility = bavaria_mobility %>% select(country_region,sub_region_1,date,residential_percent_change_from_baseline) %>%
mutate(residential_percent_change_from_baseline = -residential_percent_change_from_baseline,
source = "Google")%>%
rename(BundeslandID = sub_region_1,not_at_home_change = residential_percent_change_from_baseline)
bavaria_mobility = bavaria_mobility %>% select(date,BundeslandID,not_at_home_change,source)
#Need to filter out weekends
plt = ggplot(bavaria_mobility)+
geom_point(aes(x = date,y = not_at_home_change))
ggplotly(plt)
snz_mobility = read_delim("data/LK_mobilityData_weekdays.csv",";")
## Rows: 50652 Columns: 4
## ── Column specification ─────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): Landkreis
## dbl (3): date, outOfHomeDuration, percentageChangeComparedToBeforeCorona
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
snz_mobility = snz_mobility %>% filter(Landkreis == "Landkreis Kelheim") %>% mutate(source = "senozon") %>% select(-outOfHomeDuration) %>% rename(not_at_home_change = percentageChangeComparedToBeforeCorona)
snz_mobility$date = as.Date(strptime(snz_mobility$date,"%Y%m%d"))
plt = ggplot(snz_mobility)+
geom_point(aes(x = date,y = not_at_home_change))
ggplotly(plt)
weatherstack_kelheim_daily = weatherstack_kelheim %>%
group_by(date) %>%
summarize(precip_day = sum(precip),visibility_mean = mean(visibility),totalsnow_daily = mean(totalsnow_daily))
weatherstack_kelheim_weekly = weatherstack_kelheim_daily %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date), precip_week = sum(precip_day),visibility_mean = mean(visibility_mean),totalsnow_weekly =sum( totalsnow_daily))
weatherstack_kelheim_weekly = unique(weatherstack_kelheim_weekly)
print(weatherstack_kelheim_weekly)
## # A tibble: 717 × 5
## year_week date precip_week visibility_mean totalsnow_weekly
## <chr> <date> <dbl> <dbl> <dbl>
## 1 2008-27 2008-07-01 7.7 9.22 0
## 2 2008-28 2008-07-07 53.7 8.20 0
## 3 2008-29 2008-07-14 14 8.46 0
## 4 2008-30 2008-07-21 4 9.07 0
## 5 2008-31 2008-07-28 9 9.64 0
## 6 2008-32 2008-08-04 13.4 9.52 0
## 7 2008-33 2008-08-11 33 8.55 0
## 8 2008-34 2008-08-18 29.8 9.46 0
## 9 2008-35 2008-08-25 2.5 9.39 0
## 10 2008-36 2008-09-01 26.6 8.10 0
## # … with 707 more rows
## # ℹ Use `print(n = ...)` to see more rows
#mob_joined = rbind(snz_mobility,bavaria_mobility)
snz_mobility_year_week = snz_mobility %>%
mutate(year_week = paste0(isoyear(date),"-",isoweek(date))) %>%
group_by(year_week) %>%
summarize(date = first(date),not_at_home_change = mean(not_at_home_change))
mob_joined_with_weather = snz_mobility_year_week %>% inner_join(weatherstack_kelheim_weekly, by = "year_week") %>% select(-date.y) %>% rename(date = date.x)
print(mob_joined_with_weather)
## # A tibble: 107 × 6
## year_week date not_at_home_change precip_week visibility_mean totalsnow_weekly
## <chr> <date> <dbl> <dbl> <dbl> <dbl>
## 1 2020-10 2020-03-06 0 27.2 8.63 1.4
## 2 2020-11 2020-03-13 0 15.5 8.70 0
## 3 2020-12 2020-03-20 -14 13.3 8.75 7.3
## 4 2020-13 2020-03-27 -23 0 10 0
## 5 2020-14 2020-04-03 -20 0.5 9.99 0
## 6 2020-15 2020-04-10 -18 0.2 9.99 0
## 7 2020-16 2020-04-17 -19 8.8 9.88 0
## 8 2020-17 2020-04-24 -17 0 10 0
## 9 2020-18 2020-05-01 -13 20.4 8.73 0
## 10 2020-19 2020-05-08 -9 8.4 9.76 0
## # … with 97 more rows
## # ℹ Use `print(n = ...)` to see more rows
#First plot with colour as precipitation
plt_color = ggplot(mob_joined_with_weather)+
geom_point(aes(x = date,y = not_at_home_change,colour = precip_week))+
scale_color_gradient2()
ggplotly(plt_color)
#Second plot as another line as precipitation
plt_line = ggplot(mob_joined_with_weather)+
geom_point(aes(x = date,y = not_at_home_change))+
geom_line(aes(x = date,y = precip_week*0.5,color = "red"))
ggplotly(plt_line)